

library(rio)
library(tidyverse)
library(knitr)
library(kableExtra)

setwd("~/02_table1/")

io <- import("io_articles.xlsx") 


# how many conducted a statistical analysis?
io %>%
  filter(`Statistics (excl. survey/experiment)`==1) %>%
  summarise(stats = sum(`Statistics (excl. survey/experiment)`))


table_by_source <- io %>%
  filter(`Uses contemporary macroeconomic data`==1) %>%
  mutate(`Mentions data vintage` = ifelse(is.na(`Mentions data vintage`), 0, `Mentions data vintage`),
         `Uses WB data` = ifelse(is.na(`Uses WB data`) | `Uses WB data` == "NA", 0, `Uses WB data`),
         `Uses WB data` = as.numeric(`Uses WB data`),
         `Uses PWT data` = ifelse(is.na(`Uses PWT data`) | `Uses PWT data` == "NA", 0, `Uses PWT data`),
         `Uses PWT data` = as.numeric(`Uses PWT data`),
         `Uses Maddison data` = ifelse(is.na(`Uses Maddison data`) | `Uses Maddison data` == "NA", 0, `Uses Maddison data`),
         `Uses Maddison data` = as.numeric(`Uses Maddison data`),
         `Uses PWT 5.6` = ifelse(`If yes, what PWT vintage?` == 5.6, 1, 0),
         `Uses PWT 5.6` = as.numeric(`Uses PWT 5.6`),
         `Uses PWT 5.6` = ifelse(is.na(`Uses PWT 5.6`), 0, `Uses PWT 5.6`),
         `Uses PWT 6.1` = ifelse(`If yes, what PWT vintage?` == 6.1 | `If yes, what PWT vintage?` == 6.2 | `If yes, what PWT vintage?` == 6.3, 1, 0),
         `Uses PWT 6.1` = as.numeric(`Uses PWT 6.1`),
         `Uses PWT 6.1` = ifelse(is.na(`Uses PWT 6.1`), 0, `Uses PWT 6.1`),
         `Uses PWT 7` = ifelse(`If yes, what PWT vintage?` == 7 | `If yes, what PWT vintage?` == 7.1, 1, 0),
         `Uses PWT 7` = as.numeric(`Uses PWT 7`),
         `Uses PWT 7` = ifelse(is.na(`Uses PWT 7`), 0, `Uses PWT 7`),
         `Uses PWT 8` = ifelse(`If yes, what PWT vintage?` == 8, 1, 0),
         `Uses PWT 8` = as.numeric(`Uses PWT 8`),
         `Uses PWT 8` = ifelse(is.na(`Uses PWT 8`), 0, `Uses PWT 8`),
         `Uses WB 2005` = ifelse(`If yes, what WB vintage?` <= 2005, 1, 0),
         `Uses WB 2005` = as.numeric(`Uses WB 2005`),
         `Uses WB 2005` = ifelse(is.na(`Uses WB 2005`), 0, `Uses WB 2005`),
         `Uses WB 2010` = ifelse(`If yes, what WB vintage?` > 2005 & `If yes, what WB vintage?` <= 2010, 1, 0),
         `Uses WB 2010` = as.numeric(`Uses WB 2010`),
         `Uses WB 2010` = ifelse(is.na(`Uses WB 2010`), 0, `Uses WB 2010`),
         `Uses WB 2015` = ifelse(`If yes, what WB vintage?` > 2010 & `If yes, what WB vintage?` <=2015, 1, 0),
         `Uses WB 2015` = as.numeric(`Uses WB 2015`),
         `Uses WB 2015` = ifelse(is.na(`Uses WB 2015`), 0, `Uses WB 2015`),
         `Uses WB 2020` = ifelse(`If yes, what WB vintage?` > 2015 & `If yes, what WB vintage?` <=2020, 1, 0),
         `Uses WB 2020` = as.numeric(`Uses WB 2020`),
         `Uses WB 2020` = ifelse(is.na(`Uses WB 2020`), 0, `Uses WB 2020`),
         `Uses Maddison 2003` = ifelse(`If yes, what Maddison vintage?`==2003, 1, 0),
         `Uses Maddison 2003` = as.numeric(`Uses Maddison 2003`),
         `Uses Maddison 2003` = ifelse(is.na(`Uses Maddison 2003`), 0, `Uses Maddison 2003`),
         `Uses Maddison 2007` = ifelse(`If yes, what Maddison vintage?`==2007, 1, 0),
         `Uses Maddison 2007` = as.numeric(`Uses Maddison 2007`),
         `Uses Maddison 2007` = ifelse(is.na(`Uses Maddison 2007`), 0, `Uses Maddison 2007`),
         `Uses Maddison 2010` = ifelse(`If yes, what Maddison vintage?`==2010, 1, 0),
         `Uses Maddison 2010` = as.numeric(`Uses Maddison 2010`),
         `Uses Maddison 2010` = ifelse(is.na(`Uses Maddison 2010`), 0, `Uses Maddison 2010`),
         `Other` = ifelse(!is.na(`If not, what sources?`), 1, 0)) %>%
  summarise(`Use Relevant Data` = sum(`Uses contemporary macroeconomic data`),
            `Mention Data Source` = sum(`Mentions data source`),
            `   World Development Indicators` = sum(`Uses WB data`),
            `   Penn World Table` = sum(`Uses PWT data`),
            `   Maddison` = sum(`Uses Maddison data`),
            `   Other` = sum(`Other`),
            `Mention Data Vintage` = sum(`Mentions data vintage`),
            `   World Development Indicators until 2005` = sum(`Uses WB 2005`),
            `   World Development Indicators 2006--2010` = sum(`Uses WB 2010`),
            `   World Development Indicators 2011--2015` = sum(`Uses WB 2015`),
            `   World Development Indicators 2016--2020` = sum(`Uses WB 2020`),
            `   Penn World Table 5.6` = sum(`Uses PWT 5.6`),
            `   Penn World Table 6.1--6.3` = sum(`Uses PWT 6.1`),
            `   Penn World Table 7.0--7.1` = sum(`Uses PWT 7`),
            `   Penn World Table 8.0` = sum(`Uses PWT 8`),
            `   Maddison 2003` = sum(`Uses Maddison 2003`),
            `   Maddison 2007` = sum(`Uses Maddison 2007`),
            `   Maddison 2010` = sum(`Uses Maddison 2010`)) %>%
  pivot_longer(cols = 1:18, names_to = " ", values_to = "Number of Studies") %>%
  mutate(Percentage = round(`Number of Studies`/173*100, digits = 2))

kable(table_by_source, "latex")


# how many use PWT data via Gleditsch?
io %>%
  filter(str_detect(Notes, "PWT via Gleditsch 2002")) %>%
  summarise(stats = length(Notes))
